knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tidycensus)
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(beeswarm)
library(knitr)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(readxl)
library(mapview)
library(viridis)
## Loading required package: viridisLite
library(DT)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(vtable)
## Loading required package: kableExtra
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(lfe)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(ggpubr)

mapviewPWS <- function(pwsid,...){
  url <- paste0("https://geoconnex.us/ref/pws/",pwsid)
  pws <- sf::read_sf(url)
  mapview::mapview(pws,...)
}

mapviewOptions(fgb=FALSE)

Data Preperation

Find utilities by ownership type

This is the inventory of all utilities that fill out the EAR

inv <- read_excel("Data/EAR/inventory.xlsx",
                  sheet = "SDWISWaterSystemInventory071620") %>% filter(WaterSystemType=="C")

bounds <- sf::read_sf("https://opendata.arcgis.com/datasets/fbba842bf134497c9d611ad506ec48cc_0.geojson")

We have to correct the rates data PWSIDs

rates <- readr::read_csv("Data/OWRS/summary_table_cleanedv2.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   utility_name = col_character(),
##   pwsid = col_character(),
##   effective_date = col_character(),
##   bill_frequency = col_character(),
##   bill_unit = col_character(),
##   bill_type = col_character(),
##   tier_starts = col_character(),
##   tier_prices = col_character(),
##   median_income_category = col_character(),
##   bill_frequency_update = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
corr <- read_csv("Data/OWRS/correction.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   pwsid = col_character(),
##   utility_name = col_character(),
##   `corrected rates PWSID` = col_character()
## )
rates <- left_join(rates,corr,by=c("utility_name"))
rates$pwsid.y <- NULL
rates <- rename(rates,pwsid=pwsid.x)
rates$pwsid[which(!is.na(rates$`corrected rates PWSID`))] <- rates$`corrected rates PWSID`[which(!is.na(rates$`corrected rates PWSID`))]

Now have to correct rate tier prices

# merge in original bill units
original_units <- read_csv("Data/OWRS/summary_table.csv") %>% select(utility_name,bill_unit) 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   utility_name = col_character(),
##   pwsid = col_character(),
##   effective_date = col_date(format = ""),
##   bill_frequency = col_character(),
##   bill_unit = col_character(),
##   usage_ccf = col_double(),
##   avg_household_size = col_double(),
##   et_amount = col_double(),
##   bill_type = col_character(),
##   service_charge = col_double(),
##   commodity_charge = col_double(),
##   bill = col_double(),
##   tier_starts = col_character(),
##   tier_prices = col_character(),
##   median_income_category = col_character(),
##   approximate_median_income = col_double()
## )
rates <- left_join(rates,original_units,by="utility_name") 
#recalculate tier prices that were converted from ccf to kg with appropriate conversion factor dividing by 0.748052kgal/ccf to convert back to $/ccf, and then again to convert  to $/kgal. Original calculation was in a wrong unit of $/(ccf/0.748)
rates$Tier_price[which(rates$bill_unit.y=="ccf")]<-rates$Tier_price[which(rates$bill_unit.y=="ccf")]/(0.748052^2)

Create Billing data

Now calculate prep to calculate water bills

# set values for non-discretionary water use
gpcd <- 50
days_month <- 365/12
hh_cons_person <- gpcd*days_month

#expand grid of all combinations of utility and household size (1-7)
names <- c(unique(rates$utility_name))
hhsize <- c(1:7)
hhsize <- expand.grid(hhsize,names)
names(hhsize) <- c("hhsize","utility_name")
hhsize$utility_name <- as.character(hhsize$utility_name)

#join to main table
rates <- left_join(rates,hhsize,by="utility_name")
rates$consumption <- (rates$hhsize * hh_cons_person)/1000

write function to calculate water bills

rates <- arrange(rates,pwsid,utility_name,hhsize,Tier_number)
rates$Tier_volume[which(!is.na(rates$Tier_price) & is.na(rates$Tier_volume))] <- 10^10

rates <- rates %>% 
  group_by(utility_name,hhsize) %>%
  mutate(next_tier_volume = lead(Tier_volume),
         prev_tier_volume = case_when(
           Tier_number == 1 ~ 0,
           Tier_number > 1 ~ lag(Tier_volume)
         ),
         Tier_width = case_when(
           Tier_number == 1 & !is.na(next_tier_volume) ~ Tier_volume,
           Tier_number >1 ~ Tier_volume - prev_tier_volume,
           is.na(next_tier_volume) ~ 10^10,
           ),
         vol_in_tier = case_when(
           bill_type == "Uniform" & Tier_number ==1 ~ consumption,
           consumption <= prev_tier_volume ~ 0,
           consumption > Tier_volume ~ Tier_width,
           consumption > prev_tier_volume & consumption <= Tier_volume ~ consumption - prev_tier_volume#,
          # consumption > Tier_volume ~ Tier_width,
           #consumption >= Tier_volume & is.na(next_tier_volume) ~ consumption - Tier_volume#,
           #consumption >= Tier_volume & is.na(next_tier_volume) ~ consumption - Tier_volume
         )
  ) 

rates$vol_revenue <- NA
rates$vol_revenue = rates$Tier_price * rates$vol_in_tier

rates$vol_revenue[which(rates$bill_type=="1.49*usage_ccf")] <- 1.49*rates$consumption[which(rates$bill_type=="1.49*usage_ccf")]*0.748052
rates$vol_revenue[which(rates$bill_type=="3.04*usage_ccf*1.333")] <- 3.04*rates$consumption[which(rates$bill_type=="3.04*usage_ccf*1.333")]*1.333*0.748052

bills <- rates %>% group_by(pwsid,
                            utility_name,
                            bill_type,
                            hhsize,
                            consumption,
                            service_charge) %>%
  summarise(volumetric_charge = sum(vol_revenue,na.rm=TRUE))
## `summarise()` regrouping output by 'pwsid', 'utility_name', 'bill_type', 'hhsize', 'consumption' (override with `.groups` argument)
bills$bill <- bills$service_charge + bills$volumetric_charge

Now we need to categorize them all by ownership type. To Recover ownership type, we read in the latest EAR

# ear13 <- read_tsv("Data/EAR/earsurveyresults_2013ry.zip",col_types="ccdcccdddc")
#  ear14 <- read_tsv("Data/EAR/earsurveyresults_2014ry.zip",col_types="ccdcccdddc")
#  ear15 <- read_tsv("Data/EAR/earsurveyresults_2015ry.zip",col_types="ccdcccdddc")
#  ear16 <- read_tsv("Data/EAR/earsurveyresults_2016ry.zip",col_types="ccdcccdddc")
#  ear17 <- read_tsv("Data/EAR/earsurveyresults_2017ry.zip",col_types="ccdcccdddc")
#  ear18 <- read_tsv("Data/EAR/earsurveyresults_2018ry.zip",col_types="ccdcccdddc")
#  ear19 <- read_tsv("Data/EAR/earsurveyresults_2019ry.zip",col_names=c("PWSID",
#                                                                       "Survey",
#                                                                       "Year",
#                                                                       "SectionName",
#                                                                       "QuestionName",
#                                                                       "QuestionResults",
#                                                                       "SurveyID",
#                                                                       "SectionID",
#                                                                       "QuestionID",
#                                                                       "Pk"), col_types="ccdcccdddc")
# ear <- bind_rows(ear13,ear14,ear15,ear16,ear17,ear18,ear19)
# save(ear,file="Data/ear.rds")
load("Data/ear.rds")
ownership <- ear %>% filter(QuestionName %in% c("Water System Ownership",
                                    "IsWholesaler")) %>% 
  select(PWSID, 
         Year, 
         QuestionName, 
         QuestionResults,
         QuestionID,
         Pk) %>% 
  pivot_wider(id_cols = c(PWSID, Year),
              names_from = QuestionName,
              values_from = QuestionResults,
              names_repair="unique") %>% unnest(cols=c("Water System Ownership","IsWholesaler") ) %>%
  group_by(PWSID) %>%
  mutate(maxYear = max(Year)) %>%
  ungroup() %>%
  filter(`Water System Ownership` %in% c(
    "Privately owned business (non-community)",
    "Privately owned Mutual Water Company or Association",
    "Privately owned, non-PUC-regulated (Community Water System)",
    "Local Government",
    "Privately owned, PUC-regulated, for profit water company"
    
  )) 
## Warning: Values are not uniquely identified; output will contain list-cols.
## * Use `values_fn = list` to suppress this warning.
## * Use `values_fn = length` to identify where the duplicates arise
## * Use `values_fn = {summary_fun}` to summarise duplicates

Census data

Get all the variables

options(tigris_use_cache = TRUE)
vars <- load_variables(2019, "acs5", cache = TRUE)
census_vars2 <- c(median_income="B19013_001",
                 pop_in_hh_with_income_assistance="B09010_002",
                 pop_in_hh_with_no_income_assistance="B09010_008",
                 hh_inc_5k="B19001_002",
                 hh_inc_12.5k="B19001_003",
                 hh_inc_17.5k="B19001_004",
                 hh_inc_22.5k="B19001_005",
                 hh_inc_27.5k="B19001_006",
                 hh_inc_32.5k="B19001_007",
                 hh_inc_37.5k="B19001_008",
                 hh_inc_42.5k="B19001_009",
                 hh_inc_47.5k="B19001_010",
                 hh_inc_55k="B19001_011",
                 hh_inc_67.5k="B19001_012",
                 hh_inc_87.5k="B19001_013",
                 hh_inc_112.5k="B19001_014",
                 hh_inc_137.5k="B19001_015",
                 hh_inc_175k="B19001_016",
                 hh_inc_gr200k="B19001_017",
                 hh_income_count = "B19001_001")

Lets get all the Municipal boundaries

st <- get_acs(year=2019,geography = "state",
                  variables = "B01003_001", geometry = TRUE) %>% filter(NAME=="California")
## Getting data from the 2015-2019 5-year ACS
pl <- get_acs(year=2019,geography = "place",
                  variables = "B01003_001", geometry = TRUE, state= "CA") 
## Getting data from the 2015-2019 5-year ACS
vars <- load_variables(2019, "acs5", cache = TRUE)

Let’s get all the Census Block Groups

# bg <- get_acs(year=2019,geography = "block group",
#                   variables = census_vars2, geometry = TRUE, state= "CA") 
# save(bg, file="Data/census_blockgroups.rds")
load("Data/census_blockgroups.rds")

Let’s get all the Urbanized Areas and combine them into super areas.

ua <- get_acs(year=2019,geography = "urban area",
                  variables = c("B01003_001","B19013_001"), geometry = TRUE, output="wide")
## Getting data from the 2015-2019 5-year ACS
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
ua<-rename(ua,Pop=B01003_001E)
ua <- ua[st,]
## although coordinates are longitude/latitude, st_intersects assumes that they are planar

Descriptive stats

Now we categorize all utilities in the SDWIS Inventory by type

own <- ownership %>% group_by(PWSID,`Water System Ownership`) %>% add_tally() %>% ungroup() %>% group_by(PWSID) %>% add_tally() %>% ungroup()
## Storing counts in `nn`, as `n` already present in input
## ℹ Use `name = "new_name"` to pick a new name.
own1 <- filter(own,n == nn)
own1 <- distinct(own,PWSID,.keep_all=TRUE)
own2 <- filter(own, n != nn)
own2 <- own2 %>% 
  distinct(PWSID, `Water System Ownership`, .keep_all=TRUE) %>%
  group_by(PWSID) %>%
  mutate(maxYear = max(Year)) %>%
  ungroup() %>%
  filter(Year==maxYear) %>% 
  group_by(PWSID) %>%
  add_tally()
## Storing counts in `nnn`, as `n` already present in input
## ℹ Use `name = "new_name"` to pick a new name.
own1 <- select(own1, PWSID, `Water System Ownership`,Year)
own2 <- select(own2, PWSID, `Water System Ownership`,Year)
own <- bind_rows(own1,own2)
own <- own %>% group_by(PWSID) %>% mutate(maxYear=max(Year)) %>% ungroup() %>% filter(Year == maxYear)

rm(own1,own2)


d <- left_join(inv,own, by="PWSID")
d <- select(d, PWSID, PWS_Name, County, Pop, ServConn, `Water System Ownership`)
d$log10PopCat <- floor(log10(d$Pop))

some distributive plots

d <- filter(d,`Water System Ownership` != "Privately owned business (non-community)")
x <- data.frame(table(d$log10PopCat,d$`Water System Ownership`))
x <- x%>%pivot_wider(id_cols = Var1,names_from = Var2, values_from=Freq)

datatable(x,extensions="Buttons",options = list(
    dom = 'Bfrtip',
    buttons = c('copy', 'csv')))
y <- d %>% 
  mutate(TotPop = sum(Pop,na.rm=TRUE),
         PropPop = Pop/TotPop) %>%
  group_by(log10PopCat,`Water System Ownership`) %>% 
  summarise(Population = sum(Pop,na.rm=TRUE),
            Pop_percent = 100*sum(PropPop,na.rm=TRUE)) %>%
  pivot_wider(id_cols = `log10PopCat`, 
              names_from = `Water System Ownership`,
              values_from = c(Population,Pop_percent))
## `summarise()` regrouping output by 'log10PopCat' (override with `.groups` argument)
datatable(y,extensions="Buttons",options = list(
    dom = 'Bfrtip',
    buttons = c('copy', 'csv')))
p<- ggplot(d, aes(x=log10(Pop), color=`Water System Ownership`), size=4) +
  geom_density(lwd=2) + 
  scale_color_viridis_d() + 
  theme_dark() + 
  theme(legend.position=c(0.69, 0.85)) +
  xlab("log(base 10) of Service Population")

p
## Warning: Removed 36 rows containing non-finite values (stat_density).

Spatial demonstration figure. Show utility boundaries, municipal boundaries, and censusblock groups

b <- bounds %>% select(SABL_PWSID)  %>% rename(PWSID=SABL_PWSID)
d.b <- d %>% inner_join(b,by="PWSID") %>% st_as_sf()
pal <- magma(n = length(unique(pl$NAME)), direction = -1)

bkfield <- st_transform(filter(ua,GEOID=="04681"),4326)
d1 <- d.b[bkfield,,op=st_intersects]
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
pl1 <- st_transform(pl,4326)[bkfield,,op=st_intersects]
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
m <-  mapview(pl1,
              stroke=TRUE,
              color="red",
              alpha.regions=0,
              layer.name = "Municipal Boundaries",
              lwd=2,legend=TRUE,col.regions="red") + 
  mapview(d1,
          zcol="Water System Ownership",
          alpha=0.8, layer.name = "Water System Boundaries") 
#mapshot(m,"Figures/Map1.html")
m
d.x <- st_buffer(st_transform(d.b,3310),dist=0)

Bakersfield example

bkfield <- filter(ua,GEOID=="04681")
bg.bounds <- st_transform(bg.bounds,4326)
bkfield <- st_transform(bkfield,4326)
bkfield <- bg.bounds[bkfield,]
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
bk.bound <- d.b[bkfield,,op=st_intersects]
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
m2<-mapview(bkfield,zcol="median_income",layer.name="Census Block Group - Median HH Income") + mapview(bk.bound, alpha=1, layer.name = "Water System Boundaries", stroke=TRUE,color="black",col.regions="black", alpha.regions=0, lwd=4,legend=TRUE)
m2
#mapshot(m2,"Figures/Map2.html")

Calculate affordability

bg.bounds <- st_transform(bg.bounds,3310)
bg.bounds$Area_BlockGroupPWS <- st_area(bg.bounds)
bg.bounds$Percent_BlockGroupinPWS <- bg.bounds$Area_BlockGroupPWS/bg.bounds$AreaBlockGroup


pws.income <- bg.bounds %>% st_drop_geometry() %>%
  left_join(bg,by="GEOID") %>%
  filter(variable %in% c('hh_inc_5k',
                 'hh_inc_12.5k',
                 'hh_inc_17.5k',
                 'hh_inc_22.5k',
                 'hh_inc_27.5k',
                 'hh_inc_32.5k',
                 'hh_inc_37.5k',
                 'hh_inc_42.5k',
                 'hh_inc_47.5k',
                 'hh_inc_55k',
                 'hh_inc_67.5k',
                 'hh_inc_87.5k',
                 'hh_inc_112.5k',
                 'hh_inc_137.5k',
                 'hh_inc_175k',
                 'hh_inc_gr200k')) %>%
  group_by(PWSID,variable) %>%
  summarise(estimate=sum(estimate*Percent_BlockGroupinPWS)) %>% 
  ungroup() %>%
  mutate(var2=case_when(variable=="hh_inc_5k" ~ 1,
                 variable=="hh_inc_12.5k" ~ 2,
                 variable=="hh_inc_17.5k" ~ 3,
                 variable=="hh_inc_22.5k" ~ 4,
                 variable=="hh_inc_27.5k" ~ 5,
                 variable=="hh_inc_32.5k" ~ 6,
                 variable=="hh_inc_37.5k" ~ 7,
                 variable=="hh_inc_42.5k" ~ 8,
                 variable=="hh_inc_47.5k" ~ 9,
                 variable=="hh_inc_55k" ~ 10,
                 variable=="hh_inc_67.5k" ~ 11,
                 variable=="hh_inc_87.5k" ~ 12,
                 variable=="hh_inc_112.5k" ~ 13,
                 variable=="hh_inc_137.5k" ~ 14,
                 variable=="hh_inc_175k" ~ 15,
                 variable=="hh_inc_gr200k" ~ 16,)) %>% 
  arrange(PWSID,var2) %>% 
  group_by(PWSID) %>%
  mutate(cum_inc_sum = cumsum(estimate),
         percentile_inc = 100*cum_inc_sum/sum(estimate)) %>%
  ungroup() %>%
  mutate(perc_dist20=abs(as.numeric(percentile_inc)-20),
         perc_dist50=abs(as.numeric(percentile_inc)-50)) %>%
  group_by(PWSID) %>%
  filter(perc_dist20==min(perc_dist20) | 
         perc_dist50==min(perc_dist50)) %>%
  mutate(percentile=case_when(var2==min(var2)~"20th Percentile",
                              var2==max(var2)~"50th Percentile"),
         income_perc=case_when(var2==1 ~ 5000,
                               var2==2 ~ 12500,
                               var2==3 ~ 17500,
                               var2==4 ~ 22500,
                               var2==5 ~ 27500,
                               var2==6 ~ 32500,
                               var2==7 ~ 37500,
                               var2==8 ~ 42500,
                               var2==9 ~ 47500,
                               var2==10 ~ 55000,
                               var2==11 ~ 67500,
                               var2==12 ~ 87500,
                               var2==13 ~ 112500,
                               var2==14 ~ 137500,
                               var2==15 ~ 175000,
                               var2==16 ~ 225000)
         ) %>%
  pivot_wider(id_cols=PWSID,names_from=percentile,values_from=income_perc)
## `summarise()` regrouping output by 'PWSID' (override with `.groups` argument)
## Warning: Values are not uniquely identified; output will contain list-cols.
## * Use `values_fn = list` to suppress this warning.
## * Use `values_fn = length` to identify where the duplicates arise
## * Use `values_fn = {summary_fun}` to summarise duplicates
pws.income2 <-unnest(pws.income, cols = c(`20th Percentile`, `50th Percentile`, `NA`)) %>%
  select(PWSID,`20th Percentile`,`50th Percentile`) %>% distinct(.keep_all=TRUE)

aff <- left_join(bills,pws.income2,by=c("pwsid"="PWSID"))
aff$AR20 <- 12*100*aff$bill/aff$`20th Percentile`
aff$MHI <- 12*100*aff$bill/aff$`50th Percentile`
aff <- rename(aff,PWSID=pwsid)

aff <- left_join(aff,d.x,by="PWSID")
aff <- st_as_sf(aff)
aff <- distinct(aff,PWSID,`Water System Ownership`,hhsize,.keep_all=TRUE)

Urban-Rural-Income

d.ua <- st_join(d.x,st_transform(ua,3310), join=st_intersects, largest=TRUE)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
d.ua$Pop.y[which(is.na(d.ua$Pop.y))]<-10
d.ua <- distinct(d.ua,PWSID,.keep_all=TRUE)
p<-ggplot(d.ua,aes(x=Pop.y,y=Pop.x, color=`Water System Ownership`)) +
  geom_point() +
  scale_x_log10() + scale_y_log10() + scale_color_viridis_d() + theme_dark() +
  xlab("Population of Urbanized Area") + ylab("Water System Service Population") +
  theme(legend.position=c(0.4,0.9))

png("Figures/Fig2.png")
p
## Warning: Transformation introduced infinite values in continuous y-axis
dev.off()
## quartz_off_screen 
##                 2
p
## Warning: Transformation introduced infinite values in continuous y-axis

inc <- left_join(pws.income2,d.ua,by="PWSID")
# p<-ggplot(inc,aes(x=Pop.y,y=`50th Percentile`, color=`Water System Ownership`, size=Pop.x/1000)) + scale_size_binned("Service Pop. (1000s)", trans="identity", breaks=c(1,10,100,1000,10000)) +
#   geom_point() +
#   scale_x_log10() + scale_y_log10() + scale_color_viridis_d(labels = function(x) str_wrap(x, width=13)) + theme_dark() +
#   xlab("Population of Urbanized Area") + ylab("Median Household Income within Service Area")

inc$cat <- "1. Pop. <1,000 (n=1753)"
inc$cat[which(inc$Pop.x>=1000)] <- "2. Pop. 1,000 - 10,000 (n=456)"
inc$cat[which(inc$Pop.x>=10000)] <- "3. Pop. 10,000 - 100,000 (n=326)"
inc$cat[which(inc$Pop.x>=100000)] <- "4. Pop. >= 100,000 (n=82)"

p<- ggplot(inc, aes(x=`50th Percentile`, color=`Water System Ownership`), size=4) +
  geom_density(lwd=1.24, adjust=2) + 
  scale_color_viridis_d(labels = function(x) str_wrap(x, width=13)) + 
  theme_dark() + scale_x_log10() +
  xlab("Median HH Income") + facet_wrap(vars(cat))

png("Figures/Fig3.png")
p
## Warning: Removed 6 rows containing non-finite values (stat_density).
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
dev.off()
## quartz_off_screen 
##                 2
p
## Warning: Removed 6 rows containing non-finite values (stat_density).
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

p<- ggplot(aff, aes(x=`50th Percentile`, color=`Water System Ownership`), size=4) +
  geom_density(lwd=2, adjust=2) + 
  scale_color_viridis_d(labels = function(x) str_wrap(x, width=13)) + 
  theme_dark() + scale_x_log10() +
  xlab("Median HH Income")

png("Figures/Fig3b.png")
p
## Warning: Removed 147 rows containing non-finite values (stat_density).
dev.off()
## quartz_off_screen 
##                 2
p
## Warning: Removed 147 rows containing non-finite values (stat_density).

Big map (urbanized areas and utilities)

m3 <- mapview(d.x,zcol="Water System Ownership", alpha=0.8, layer.name = "All Water System Boundaries") +
  mapview(filter(aff,hhsize==4),stroke=TRUE,color="red",alpha.regions=0, layer.name = "Systems with price information", alpha = 0.5, col.regions="red")
#mapshot(m3,"Figures/Map3.html")
m3

SUmmarize ILI

audit <- read_csv("Data/audit_corrected.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   pwsid = col_character(),
##   PI_INFRASTRUCT_LEAKAGE_INDEX = col_double(),
##   REPORTING_YEAR = col_double(),
##   WATER_SUPPLIER_NAME = col_character(),
##   SUPPLIER_NAME = col_character()
## )
audit <- audit %>% 
  rename(PWSID=pwsid,
         ILI=PI_INFRASTRUCT_LEAKAGE_INDEX) %>%
  group_by(PWSID)  %>%
  distinct(PWSID,REPORTING_YEAR,.keep_all = TRUE) %>%
  summarise(ILI=mean(ILI,na.rm=TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)

Summarize SDWA status

Summarize shutoffs

shutoffs <- ear %>% filter(substr(QuestionName,1,12)=="WR SHUT_OFFS" & Year==2018)

pws_shutoff_collect <- shutoffs %>% filter(QuestionName=="WR SHUT_OFFS Once No Collect" & is.na(QuestionResults))

shutoffs_total <- shutoffs %>%
  filter(QuestionName=="WR SHUT_OFFS Once SF Total" |
           QuestionName=="WR SHUT_OFFS More Than Once SF Total" |
           QuestionName=="WR SHUT_OFFS More Than Once MF Total" |
           QuestionName=="WR SHUT_OFFS Once SF Total") %>%
  group_by(PWSID, Year) %>%
  summarise(shutoffs=sum(as.numeric(QuestionResults),na.rm=TRUE))%>%
  select(PWSID,shutoffs) %>%
  distinct(.keep_all=TRUE) %>% semi_join(pws_shutoff_collect,by="PWSID")
## `summarise()` regrouping output by 'PWSID' (override with `.groups` argument)

Summarize subsidies

subsidies <- ear %>% 
  filter(QuestionName=="WR LL Subsidies") %>% 
  filter(QuestionResults!="-") %>% 
  select(PWSID,QuestionResults,Year) %>% 
  rename(subsidy=QuestionResults) %>% distinct(.keep_all=TRUE) 

subs <- subsidies %>% 
  group_by(PWSID) %>%
  mutate(maxYear=max(Year)) %>%
  ungroup() %>%
  filter(Year == maxYear)

Descriptive Statistics and Plots

make big table

D <- aff 

D <- left_join(D,audit,by="PWSID") # add ILI

D <- left_join(D,v,by="PWSID") #add SDWA violator status
  D$years_serious_violater[which(is.na(D$years_serious_violater))] <- 0

D <- left_join(D, shutoffs_total, by="PWSID") #add shutoffs_total  
D$shutoffs_perc = D$shutoffs/as.numeric(D$ServConn)

D <- left_join(D,subs,by="PWSID") ## add subsidy programs

save(D,file="Data/FinalData.rds")

descriptive stats

rm(list=ls())
load("Data/FinalData.rds")
D.f <- D
D <- D.f
D <- D %>% ungroup() %>% distinct(PWSID,hhsize,.keep_all = TRUE)

D1 <- D%>%filter(hhsize==1) 
D2 <- D%>%filter(hhsize==2) %>% select(PWSID,consumption,bill,AR20) %>% rename(consumption_2=consumption,
                                                                                      bill_hhsize2=bill, AR20_hhsize2 = AR20) %>% st_drop_geometry()
D3 <- D%>%filter(hhsize==3) %>% select(PWSID,consumption,bill,AR20) %>% rename(consumption_3=consumption,
                                                                                      bill_hhsize3=bill, AR20_hhsize3 = AR20)  %>% st_drop_geometry()
D4 <- D%>%filter(hhsize==4) %>% select(PWSID,consumption,bill,AR20)%>% rename(consumption_4=consumption,
                                                                                      bill_hhsize4=bill, AR20_hhsize4 = AR20)  %>% st_drop_geometry()
D5 <- D%>%filter(hhsize==5) %>% select(PWSID,consumption,bill,AR20)%>% rename(consumption_5=consumption,
                                                                                      bill_hhsize5=bill, AR20_hhsize5 = AR20) %>% st_drop_geometry()
D6 <- D%>%filter(hhsize==6) %>% select(PWSID,consumption,bill,AR20)%>% rename(consumption_6=consumption,
                                                                                      bill_hhsize6=bill, AR20_hhsize6 = AR20) %>% st_drop_geometry()
D7 <- D%>%filter(hhsize==7) %>% select(PWSID,consumption,bill,AR20)%>% rename(consumption_7=consumption,
                                                                                      bill_hhsize7=bill, AR20_hhsize7 = AR20) %>% st_drop_geometry()



D.wide <- D1 %>% 
  left_join(D2,by="PWSID") %>%
  left_join(D3,by="PWSID") %>%
  left_join(D4,by="PWSID") %>%
  left_join(D5,by="PWSID") %>%
  left_join(D6,by="PWSID") %>%
  left_join(D7,by="PWSID") 

D.wide$geometry <- NULL
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:scales':
## 
##     alpha, rescale
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
D.wide <- filter(D.wide,!is.na(AR20))
D.wide2 <- D.wide %>% select(PWSID,Pop,`Water System Ownership`, 
                  `20th Percentile`, `50th Percentile`,
                 ILI, years_serious_violater, shutoffs_perc,subsidy,
                 bill_type,bill,bill_hhsize2,bill_hhsize3,bill_hhsize4,bill_hhsize5,bill_hhsize6,bill_hhsize7,
                 AR20,AR20_hhsize2,AR20_hhsize3,AR20_hhsize4,AR20_hhsize5,AR20_hhsize6,AR20_hhsize7)

D.wide2$bill_type[which(D.wide2$bill_type %in% c("1.49*usage_ccf","3.04*usage_ccf*1.333"))] <- "Uniform"

st(D.wide2, group='Water System Ownership', group.test=TRUE, digits=2)
Summary Statistics
Water System Ownership
Local Government
Privately owned Mutual Water Company or Association
Privately owned, non-PUC-regulated (Community Water System)
Privately owned, PUC-regulated, for profit water company
Variable N Mean SD N Mean SD N Mean SD N Mean SD Test
Pop 314 83750.68 266046.94 14 17393.86 15073.31 9 32356.22 62079.97 69 73160.33 131730.3 F=0.471
20th Percentile 314 29920.38 12697.07 14 34821.43 15268.88 9 30277.78 12018.5 69 28659.42 14517.86 F=0.868
50th Percentile 314 67914.01 25586.33 14 73571.43 33793.98 9 68611.11 27332.19 69 66195.65 31890.81 F=0.297
ILI 265 2.04 1.96 10 1.33 0.94 4 0.98 1.2 49 1.85 1.47 F=0.96
years_serious_violater 314 0.12 0.43 14 0.29 1.07 9 0 0 69 0.03 0.17 F=1.862
shutoffs_perc 247 0.03 0.05 10 0.02 0.03 7 0.01 0.02 40 0.02 0.02 F=0.993
subsidy 312 13 6 69 X2=99.397***
… No 223 71% 12 92% 3 50% 6 9%
… Yes 89 29% 1 8% 3 50% 63 91%
bill_type 314 14 9 69 X2=52.369***
… 0 0 0% 1 7% 0 0% 0 0%
… Budget 14 4% 0 0% 1 11% 0 0%
… Tiered 205 65% 11 79% 8 89% 63 91%
… Uniform 95 30% 2 14% 0 0% 6 9%
bill 314 30.26 14.8 14 35.2 26.44 9 46.47 24.95 69 31.93 11.98 F=3.816**
bill_hhsize2 314 35.72 16.72 14 38.7 27.31 9 50.73 23.61 69 39.18 14.37 F=2.957**
bill_hhsize3 314 41.49 19.76 14 42.2 28.48 9 54.99 23.05 69 46.6 17.23 F=2.451*
bill_hhsize4 314 47.56 23.51 14 45.82 30.01 9 59.56 23.64 69 54.1 20.43 F=2.212*
bill_hhsize5 314 54.02 28.1 14 49.85 31.62 9 64.68 25.52 69 62.13 25.18 F=2.14*
bill_hhsize6 314 60.77 33.1 14 54.04 33.43 9 70.33 27.42 69 70.3 30.56 F=2.097
bill_hhsize7 314 67.72 38.51 14 58.45 35.46 9 76.15 29.78 69 78.87 36.43 F=2.125*
AR20 314 1.41 0.9 14 1.37 1.16 9 2.48 2.48 69 1.81 1.64 F=4.71***
AR20_hhsize2 314 1.65 1.02 14 1.5 1.17 9 2.63 2.41 69 2.2 1.98 F=5.096***
AR20_hhsize3 314 1.9 1.17 14 1.62 1.2 9 2.77 2.35 69 2.59 2.33 F=5.392***
AR20_hhsize4 314 2.16 1.36 14 1.76 1.24 9 2.94 2.32 69 2.99 2.68 F=5.539***
AR20_hhsize5 314 2.44 1.58 14 1.92 1.29 9 3.14 2.37 69 3.41 3.05 F=5.661***
AR20_hhsize6 314 2.74 1.81 14 2.09 1.34 9 3.36 2.41 69 3.84 3.43 F=5.689***
AR20_hhsize7 314 3.04 2.06 14 2.27 1.4 9 3.59 2.45 69 4.28 3.82 F=5.734***
Statistical significance markers: * p<0.1; ** p<0.05; *** p<0.01
D.wide3 <- filter(D.wide2,`Water System Ownership` %in% c("Local Government","Privately owned, PUC-regulated, for profit water company"))

st(D.wide3, group='Water System Ownership', group.test=TRUE, digits=2)
Summary Statistics
Water System Ownership
Local Government
Privately owned, PUC-regulated, for profit water company
Variable N Mean SD N Mean SD Test
Pop 314 83750.68 266046.94 69 73160.33 131730.3 F=0.104
20th Percentile 314 29920.38 12697.07 69 28659.42 14517.86 F=0.529
50th Percentile 314 67914.01 25586.33 69 66195.65 31890.81 F=0.232
ILI 265 2.04 1.96 49 1.85 1.47 F=0.415
years_serious_violater 314 0.12 0.43 69 0.03 0.17 F=3.076*
shutoffs_perc 247 0.03 0.05 40 0.02 0.02 F=1.653
subsidy 312 69 X2=90.27***
… No 223 71% 6 9%
… Yes 89 29% 63 91%
bill_type 314 69 X2=18.519***
… Budget 14 4% 0 0%
… Tiered 205 65% 63 91%
… Uniform 95 30% 6 9%
bill 314 30.26 14.8 69 31.93 11.98 F=0.772
bill_hhsize2 314 35.72 16.72 69 39.18 14.37 F=2.54
bill_hhsize3 314 41.49 19.76 69 46.6 17.23 F=3.959**
bill_hhsize4 314 47.56 23.51 69 54.1 20.43 F=4.588**
bill_hhsize5 314 54.02 28.1 69 62.13 25.18 F=4.882**
bill_hhsize6 314 60.77 33.1 69 70.3 30.56 F=4.815**
bill_hhsize7 314 67.72 38.51 69 78.87 36.43 F=4.836**
AR20 314 1.41 0.9 69 1.81 1.64 F=7.64***
AR20_hhsize2 314 1.65 1.02 69 2.2 1.98 F=11.031***
AR20_hhsize3 314 1.9 1.17 69 2.59 2.33 F=13.046***
AR20_hhsize4 314 2.16 1.36 69 2.99 2.68 F=13.874***
AR20_hhsize5 314 2.44 1.58 69 3.41 3.05 F=14.356***
AR20_hhsize6 314 2.74 1.81 69 3.84 3.43 F=14.397***
AR20_hhsize7 314 3.04 2.06 69 4.28 3.82 F=14.441***
Statistical significance markers: * p<0.1; ** p<0.05; *** p<0.01
D.wide3$subsidy[which(is.na(D.wide3$subsidy))] <- "NA"

f4<-ggplot(filter(D.wide2,Pop>0), aes(x=Pop,y=AR20_hhsize4,color=`Water System Ownership`, shape=subsidy, cex=2))  + geom_hline(yintercept=3.5, color="green", lwd=2)+ geom_point(size=2) + scale_x_log10() + theme_dark() + scale_color_viridis_d() + theme(legend.position=c(0.7,0.6)) + xlab("Service Population (log10 scale)") + ylab("AR20 for 4-person Household")  # + geom_smooth(formula=y~log10(x),method='lm')
png("Figures/Figure4.png")
f4
## Warning: Removed 6 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen 
##                 2
f4
## Warning: Removed 6 rows containing missing values (geom_point).

f5 <- ggplot(D.wide3, aes(x=AR20_hhsize4, color=`Water System Ownership`), size=4) +
  geom_density(lwd=2) + 
  scale_color_viridis_d() + 
  theme_dark() + 
  xlab("AR20")


f6 <- ggplot(D.wide3, aes(x=shutoffs_perc*100, color=`Water System Ownership`), size=4) +
  geom_density(lwd=2) + 
  scale_color_viridis_d() + 
  theme_dark() + 
  xlab("Shutoff rate")

f7 <- ggplot(D.wide3, aes(x=ILI, color=`Water System Ownership`), size=4) +
  geom_density(lwd=2) + 
  scale_color_viridis_d() + 
  theme_dark() + 
  xlab("ILI")

f8 <- ggplot(D.wide3, aes(x=`years_serious_violater`, color=`Water System Ownership`), size=4) +
  geom_density(lwd=2) + 
  scale_color_viridis_d() + 
  theme_dark() + 
  xlab("Years in which USEPA has labelled serious SDWA Violator")

F5 <- ggarrange(f5,f6,f7,f8, labels = c("A: AR20 (HH size 4)", "B: Shutoff Rate (%)","C: ILI", "D: SDWA Violation"),
                common.legend=TRUE, legend="bottom")
## Warning: Removed 96 rows containing non-finite values (stat_density).
## Warning: Removed 69 rows containing non-finite values (stat_density).
F5

inv <- read_excel("Data/EAR/inventory.xlsx",
                  sheet = "SDWISWaterSystemInventory071620") 

inv <- select(inv,PWSID,County)

D.wide2 <- left_join(D.wide2,inv,by="PWSID") 
D.wide3 <- left_join(D.wide3,inv,by="PWSID") 
D.wide3 <- filter(D.wide3,Pop>0)
D.wide3$shutoffs_perc <- 100*D.wide3$shutoffs_perc

myfelmAR <- function(data){

list.B <- list()
list.B$B2.2 <- felm(data=data,AR20_hhsize2 ~ `Water System Ownership` + log(Pop)  + `50th Percentile`|0|0|County)
list.B$B3.2 <- felm(data=data,AR20_hhsize3 ~ `Water System Ownership` + log(Pop) + `50th Percentile` |0|0|County)
list.B$B4.2 <- felm(data=data,AR20_hhsize4 ~ `Water System Ownership` + log(Pop) + `50th Percentile` |0|0|County)
list.B$B5.2 <- felm(data=data,AR20_hhsize5 ~ `Water System Ownership` + log(Pop) + `50th Percentile` |0|0|County)
list.B$B6.2 <- felm(data=data,AR20_hhsize6 ~ `Water System Ownership` + log(Pop) + `50th Percentile` |0|0|County)
list.B$S2.2 <- felm(data=filter(data,subsidy!="NA"),shutoffs_perc ~ `Water System Ownership` + log(Pop)  + `50th Percentile` + AR20_hhsize4 + subsidy|0|0|County)
return(list.B)
}  

myfelmILI <- function(data){

list.B <- list()
list.B$B2.1 <- felm(data=data,ILI ~ log(Pop) + `50th Percentile` +  bill_hhsize4|0|0|County)
list.B$B2.2 <- felm(data=data,ILI ~ `Water System Ownership` + log(Pop)  + `50th Percentile` + bill_hhsize4|0|0|County)
list.B$B2.3 <- felm(data=data,ILI ~ `Water System Ownership` + log(Pop)  + `50th Percentile` + bill_hhsize4|County|0|County)


return(list.B)
}  

myfelmV <- function(data){

list.B <- list()
list.B$B2.1 <- felm(data=data,`years_serious_violater` ~ log(Pop) + `50th Percentile` +  bill_hhsize4|0|0|County)
list.B$B2.2 <- felm(data=data,`years_serious_violater` ~ `Water System Ownership` + log(Pop)  + `50th Percentile` + bill_hhsize4|0|0|County)
list.B$B2.3 <- felm(data=data,`years_serious_violater` ~ `Water System Ownership` + log(Pop)  + `50th Percentile` + bill_hhsize4|County|0|County)


return(list.B)
}  

myfelmSO <- function(data){

list.B <- list()
list.B$B2.1 <- felm(data=filter(data,subsidy!="NA"),shutoffs_perc ~ log(Pop) + `50th Percentile` + AR20_hhsize4 + subsidy |0|0|County)
list.B$B2.2 <- felm(data=filter(data,subsidy!="NA"),shutoffs_perc ~ `Water System Ownership` + log(Pop)  + `50th Percentile` + AR20_hhsize4 + subsidy|0|0|County)
list.B$B2.3 <- felm(data=filter(data,subsidy!="NA"),shutoffs_perc ~ `Water System Ownership` + log(Pop)  + `50th Percentile`  + AR20_hhsize4 + subsidy|County|0|County)


return(list.B)
}  


myfelmP <- function(data){
  
list.B <- list()
  list.B$I2.2 <- felm(data=data,ILI ~ `Water System Ownership` + log(Pop)  + `50th Percentile` + bill_hhsize4|0|0|County)
  list.B$V2.2 <- felm(data=data,`years_serious_violater` ~ `Water System Ownership` + log(Pop)  + `50th Percentile` + bill_hhsize4|0|0|County)
  return(list.B)
}

Regressions

l1 <- myfelmAR(D.wide3)
l2 <- myfelmSO(D.wide3)
l3 <- myfelmILI(D.wide3)
l4 <- myfelmV(D.wide3)

l5 <- myfelmAR(D.wide3)
lP <-myfelmP(D.wide3)

stargazer(l1,type="html",out="Tables/AR20Regression.html")
Dependent variable:
AR20_hhsize2 AR20_hhsize3 AR20_hhsize4 AR20_hhsize5 AR20_hhsize6 shutoffs_perc
(1) (2) (3) (4) (5) (6)
Water System OwnershipPrivately owned, PUC-regulated, for profit water company 0.397* 0.525** 0.640** 0.761** 0.877** -1.266**
(0.219) (0.265) (0.312) (0.364) (0.416) (0.597)
log(Pop) -0.317*** -0.355*** -0.397*** -0.438*** -0.480*** -0.572**
(0.096) (0.117) (0.139) (0.161) (0.184) (0.243)
50th Percentile -0.00002*** -0.00002*** -0.00002*** -0.00002*** -0.00003*** -0.00001
(0.00000) (0.00000) (0.00000) (0.00000) (0.00001) (0.00001)
AR20_hhsize4 0.698***
(0.207)
subsidyYes -0.539
(0.420)
Constant 6.232*** 7.012*** 7.837*** 8.678*** 9.535*** 8.335***
(1.133) (1.358) (1.595) (1.841) (2.094) (2.774)
Observations 382 382 382 382 382 285
R2 0.361 0.333 0.308 0.283 0.261 0.134
Adjusted R2 0.356 0.328 0.302 0.277 0.255 0.119
Residual Std. Error 1.012 (df = 378) 1.207 (df = 378) 1.425 (df = 378) 1.667 (df = 378) 1.923 (df = 378) 4.370 (df = 279)
Note: p<0.1; p<0.05; p<0.01
stargazer(l2,type="html",out="Tables/SORegression.html")
Dependent variable:
shutoffs_perc
(1) (2) (3)
Water System OwnershipPrivately owned, PUC-regulated, for profit water company -1.266** -0.502
(0.597) (0.465)
log(Pop) -0.516** -0.572** -0.718**
(0.260) (0.243) (0.301)
50th Percentile -0.00001 -0.00001 0.00001
(0.00001) (0.00001) (0.00001)
AR20_hhsize4 0.674*** 0.698*** 0.762**
(0.208) (0.207) (0.346)
subsidyYes -0.949** -0.539 -0.591
(0.454) (0.420) (0.358)
Constant 7.783*** 8.335***
(2.941) (2.774)
Observations 285 285 285
R2 0.127 0.134 0.395
Adjusted R2 0.115 0.119 0.266
Residual Std. Error 4.379 (df = 280) 4.370 (df = 279) 3.988 (df = 234)
Note: p<0.1; p<0.05; p<0.01
stargazer(l3,type="html",out="Tables/ILIRegression.html")
Dependent variable:
ILI
(1) (2) (3)
Water System OwnershipPrivately owned, PUC-regulated, for profit water company -0.030 -0.001
(0.165) (0.219)
log(Pop) -0.228** -0.227** -0.144*
(0.097) (0.097) (0.082)
50th Percentile -0.00001* -0.00001* 0.00000
(0.00000) (0.00000) (0.00000)
bill_hhsize4 -0.019*** -0.019*** -0.015**
(0.006) (0.006) (0.007)
Constant 5.893*** 5.884***
(1.272) (1.269)
Observations 314 314 314
R2 0.082 0.082 0.395
Adjusted R2 0.073 0.070 0.283
Residual Std. Error 1.817 (df = 310) 1.820 (df = 309) 1.598 (df = 264)
Note: p<0.1; p<0.05; p<0.01
stargazer(l4,type="html",out="Tables/VRegression.html")
Dependent variable:
years_serious_violater
(1) (2) (3)
Water System OwnershipPrivately owned, PUC-regulated, for profit water company -0.094* -0.105
(0.049) (0.067)
log(Pop) -0.008 -0.010 0.004
(0.010) (0.010) (0.011)
50th Percentile -0.00000** -0.00000** 0.00000
(0.00000) (0.00000) (0.00000)
bill_hhsize4 -0.001 -0.001 0.001
(0.001) (0.001) (0.001)
Constant 0.315** 0.345**
(0.128) (0.144)
Observations 382 382 382
R2 0.013 0.021 0.296
Adjusted R2 0.005 0.011 0.188
Residual Std. Error 0.395 (df = 378) 0.394 (df = 377) 0.357 (df = 330)
Note: p<0.1; p<0.05; p<0.01
stargazer(l5,type="html",out="Tables/ERegression.html")
Dependent variable:
AR20_hhsize2 AR20_hhsize3 AR20_hhsize4 AR20_hhsize5 AR20_hhsize6 shutoffs_perc
(1) (2) (3) (4) (5) (6)
Water System OwnershipPrivately owned, PUC-regulated, for profit water company 0.397* 0.525** 0.640** 0.761** 0.877** -1.266**
(0.219) (0.265) (0.312) (0.364) (0.416) (0.597)
log(Pop) -0.317*** -0.355*** -0.397*** -0.438*** -0.480*** -0.572**
(0.096) (0.117) (0.139) (0.161) (0.184) (0.243)
50th Percentile -0.00002*** -0.00002*** -0.00002*** -0.00002*** -0.00003*** -0.00001
(0.00000) (0.00000) (0.00000) (0.00000) (0.00001) (0.00001)
AR20_hhsize4 0.698***
(0.207)
subsidyYes -0.539
(0.420)
Constant 6.232*** 7.012*** 7.837*** 8.678*** 9.535*** 8.335***
(1.133) (1.358) (1.595) (1.841) (2.094) (2.774)
Observations 382 382 382 382 382 285
R2 0.361 0.333 0.308 0.283 0.261 0.134
Adjusted R2 0.356 0.328 0.302 0.277 0.255 0.119
Residual Std. Error 1.012 (df = 378) 1.207 (df = 378) 1.425 (df = 378) 1.667 (df = 378) 1.923 (df = 378) 4.370 (df = 279)
Note: p<0.1; p<0.05; p<0.01
stargazer(lP,type="html",out="Tables/PRegression.html")
Dependent variable:
ILI years_serious_violater
(1) (2)
Water System OwnershipPrivately owned, PUC-regulated, for profit water company -0.030 -0.094*
(0.165) (0.049)
log(Pop) -0.227** -0.010
(0.097) (0.010)
50th Percentile -0.00001* -0.00000**
(0.00000) (0.00000)
bill_hhsize4 -0.019*** -0.001
(0.006) (0.001)
Constant 5.884*** 0.345**
(1.269) (0.144)
Observations 314 382
R2 0.082 0.021
Adjusted R2 0.070 0.011
Residual Std. Error 1.820 (df = 309) 0.394 (df = 377)
Note: p<0.1; p<0.05; p<0.01
stargazer(l1)
% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu % Date and time: Sun, Dec 27, 2020 - 13:35:37
stargazer(l2)
% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu % Date and time: Sun, Dec 27, 2020 - 13:35:38
stargazer(l3)
% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu % Date and time: Sun, Dec 27, 2020 - 13:35:38
stargazer(l4)
% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu % Date and time: Sun, Dec 27, 2020 - 13:35:38
stargazer(l5)
% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu % Date and time: Sun, Dec 27, 2020 - 13:35:38